Here we'll try very hard to overfit, by using silly models.
This notebook should be run after the preprocessing part in dogs_vs_cats_with_AlexNet has already been run.
In [19]:
from __future__ import division, print_function
from matplotlib import pyplot as plt
%matplotlib inline
import bcolz
import numpy as np
import pandas as pd
import os
In [2]:
import theano
import keras
In [3]:
from keras import backend as K
from keras.models import Sequential
from keras.layers.core import Dense, Dropout, Flatten, Lambda, Activation
from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import SGD
from keras.preprocessing import image
from keras.layers.core import Layer
from keras.layers import merge
from keras.callbacks import CSVLogger
In [4]:
IMAGE_HEIGHT, IMAGE_WIDTH = 227, 227
In [5]:
def load_array(fname):
return bcolz.open(fname)[:]
img_mean = load_array('input/img_mean.bz')
def center(img):
return img - img_mean.astype(np.float32).transpose([2,0,1])
In [9]:
linear = Sequential([
Lambda(center, input_shape=(3, IMAGE_HEIGHT, IMAGE_WIDTH), output_shape=(3, IMAGE_HEIGHT, IMAGE_WIDTH)),
Flatten(),
Dense(2, activation='softmax')
])
In [10]:
linear.summary()
309k parameters for 23k images. It shouldn't be too hard to learn them all...
In [6]:
def get_batches(dirname, gen=image.ImageDataGenerator(), shuffle=True, batch_size=4, class_mode='categorical',
target_size=(IMAGE_HEIGHT, IMAGE_WIDTH)):
return gen.flow_from_directory(dirname, target_size=target_size,
class_mode=class_mode, shuffle=shuffle, batch_size=batch_size)
In [7]:
def fit_model(model, batches, val_batches, nb_epoch=1, verbose=1, callbacks=None):
model.fit_generator(batches, batches.n//batches.batch_size, epochs=nb_epoch, callbacks=callbacks,
validation_data=val_batches, validation_steps=val_batches.n//val_batches.batch_size, verbose=verbose)
In [8]:
train_path = 'input/train'
valid_path = 'input/valid'
test_path = 'input/test'
In [17]:
batches = get_batches(train_path, batch_size=2000)
val_batches = get_batches(valid_path, batch_size=2000)
In [18]:
sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
linear.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
csv_logger = CSVLogger('training_linear.log')
In [19]:
# valid_batches and batches are wrongly named - inverted...
fit_model(linear, batches, val_batches, nb_epoch=20, callbacks=[csv_logger], verbose=1)
In [23]:
training_results = pd.read_csv('training_linear.log')
In [24]:
plt.style.use('ggplot')
plt.rcParams.update({'font.size': 22})
training_results['acc'].plot(figsize=(15,10))
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Training Accuracy')
Out[24]:
In [9]:
two_layers = Sequential([
Lambda(center, input_shape=(3, IMAGE_HEIGHT, IMAGE_WIDTH), output_shape=(3, IMAGE_HEIGHT, IMAGE_WIDTH)),
Flatten(),
Dense(500, activation='relu'),
Dense(2, activation='softmax')
])
In [25]:
two_layers.summary()
In [22]:
sgd5 = SGD(lr=0.05, decay=1e-6, momentum=0.9, nesterov=True)
two_layers.compile(optimizer=sgd5, loss='categorical_crossentropy', metrics=['accuracy'])
csv_logger = CSVLogger('training_two_layers.log')
In [26]:
small_batches = get_batches(valid_path, batch_size=64)
In [27]:
two_layers.fit_generator(small_batches, small_batches.n//small_batches.batch_size, epochs=20, callbacks = [csv_logger])
Out[27]:
In [37]:
small_batches = get_batches(valid_path, batch_size=256)
In [29]:
two_layers.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
In [30]:
two_layers.fit_generator(small_batches, small_batches.n//small_batches.batch_size, epochs=20,
callbacks = [CSVLogger('training_two_layers_part_two.log')])
Out[30]:
In [54]:
w1 = two_layers.layers[2].get_weights()[0]
b1 = two_layers.layers[2].get_weights()[1]
w2 = two_layers.layers[3].get_weights()[0]
b2 = two_layers.layers[3].get_weights()[1]
def save_array(fname, arr):
c=bcolz.carray(arr, rootdir=fname, mode='w')
c.flush()
def create_dir(path):
try:
os.makedirs(path)
except OSError as e:
if e.errno != errno.EEXIST:
raise
create_dir('model_weights')
save_array('model_weights/w1.bz', w1)
save_array('model_weights/b1.bz', b1)
save_array('model_weights/w2.bz', w2)
save_array('model_weights/b2.bz', b2)
In [11]:
w1 = load_array('model_weights/w1.bz')
b1 = load_array('model_weights/b1.bz')
w2 = load_array('model_weights/w2.bz')
b2 = load_array('model_weights/b2.bz')
two_layers.layers[2].set_weights((w1, b1))
two_layers.layers[3].set_weights((w2, b2))
In [12]:
small_batches = get_batches(valid_path, batch_size=500)
sgd = SGD(lr=0.005, decay=1e-6, momentum=0.9, nesterov=True)
two_layers.compile(optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy'])
In [13]:
two_layers.fit_generator(small_batches, small_batches.n//small_batches.batch_size, epochs=200,
callbacks = [CSVLogger('training_two_layers_part_three.log')])
In [16]:
training_results = pd.concat((
pd.read_csv('training_two_layers.log'), pd.read_csv('training_two_layers_part_two.log'), pd.read_csv('training_two_layers_part_three.log')
)).reset_index(drop=True)
In [17]:
print(training_results.shape)
training_results.head()
Out[17]:
In [22]:
plt.style.use('ggplot')
plt.rcParams.update({'font.size': 22})
training_results['acc'].plot(figsize=(15,10))
plt.ylim([0, 1])
plt.xlabel('Epoch')
plt.ylabel('Training Accuracy')
Out[22]:
In [ ]: